# Read in count matrix
counts <- read.delim("data/rsem.merged.gene_counts.tsv") %>%
mutate(gene_id = str_remove(gene_id, "\\..+")) %>%
select(-transcript_id.s.) %>%
filter(!duplicated(gene_id)) %>%
column_to_rownames("gene_id")
# Map ENSEMBL into symbols
symb <- clusterProfiler::bitr(rownames(counts),
fromType = "ENSEMBL", toType = "SYMBOL",
OrgDb = org.Hs.eg.db)
counts <- counts[symb$ENSEMBL, ]
# Read in metadata
meta <- read_excel("data/std_meta_Aus.xlsx")
# Set same names for sample names in the metadata and the count matrix
counts <- counts[meta$sample] # same order
rownames(meta) <- meta$sample
counts <- counts[meta$sample]
#all(colnames(counts) == rownames(meta))
# Create DESeq2 object
meta <- meta %>% mutate(diagnosis = str_remove(diagnosis, " |-"),
diagnosis = factor(diagnosis),
lobe = factor(lobe))
dds <- DESeqDataSetFromMatrix(countData = round(counts),
colData = meta,
design = ~ diagnosis + lobe)
# Prefilter
keep <- rowSums(counts(dds) >= 10) >= 5
dds <- dds[keep,]
The design was specified when creating the object.
dds <- DESeq(dds)
Extract normalized/transformed counts:
norm <- counts(dds, normalized = TRUE)
# Export normalized counts
write.csv(norm, "results/normalized_counts_DESeq2.csv", row.names = TRUE)
# Transformed variance: - for visualization
vsd <- vst(dds, blind = FALSE)
vsd <- vsd[, vsd$diagnosis %in% c("Control", "FCDIIb")]
# Get sample-to-sample distances
dist <- dist(t(assay(vsd)))
# Distance matrix
sampleDistMatrix <- as.matrix( dist )
#colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
annotation_rows = data.frame(
Diagnosis = vsd$diagnosis
)
rownames(annotation_rows) <- row.names(sampleDistMatrix)
ann_colors = list(
Diagnosis = c("Control" = "darkgreen", "FCDIIb" = "orange")
)
pheatmap(sampleDistMatrix,
clustering_distance_rows=dist,
clustering_distance_cols=dist,
annotation_row = NULL,
annotation_col = annotation_rows,
show_colnames = F,
show_rownames = F,
annotation_colors = ann_colors,
col=colors,
clustering_method = "complete"
)
# Export as svg
ggsave("results/sample_distance.svg",
width = 6,
height = 4,
dpi = 300)
res <- results(dds, contrast = c("diagnosis", "FCDIIb", "Control"))
All genes:
summary(res)
##
## out of 20987 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 892, 4.3%
## LFC < 0 (down) : 2684, 13%
## outliers [1] : 8, 0.038%
## low counts [2] : 6100, 29%
## (mean count < 126)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
#resultsNames(dds)
res <- res %>%
as.data.frame() %>%
rownames_to_column("gene") %>%
left_join(symb, by = c("gene" = "ENSEMBL")) %>%
select(gene, SYMBOL, log2FoldChange, pvalue, padj) %>%
mutate(across(c("log2FoldChange", padj), round, digits = 4))
DT::datatable(res, options = list(ordering = FALSE), filter = "top")
Significant changes (FC > 1.5):
sig <- res %>%
filter(padj < 0.1, abs(log2FoldChange) > log2(1.5)) %>%
arrange(desc(log2FoldChange))
sum(sig$log2FoldChange > 0)
## [1] 755
DT::datatable(sig, options = list(ordering = FALSE), filter = "top")
Export list:
setwd("~/Library/CloudStorage/GoogleDrive-icottagalvao@gmail.com/My Drive/Doutorado/Disciplinas/MO413 - Ciência de dados e visualização em saúde/ProjetoFinal")
# All genes with fold changes
write.csv(res, "results/all_DEGs_FCDIIbvsControl.csv", row.names = FALSE)
# Significant genes
write.csv(sig, "results/significant_DEGs_FCDIIbvsControl.csv", row.names = FALSE)
Volcano plot:
# Create new column if up/down regulated
res <- res %>%
mutate(Status = case_when(log2FoldChange > log2(1.5) & padj < 0.1 ~ "Up-regulated", log2FoldChange < -log2(1.5) & padj < 0.1 ~ "Down-regulated", TRUE ~ "Not significant"),
label = ifelse((padj < 0.1 & abs(log2FoldChange) > 2.5), SYMBOL, ""))
ggplot(res, aes(x = log2FoldChange, y = -log10(padj), color = Status, label = label)) +
geom_vline(xintercept = c(-log2(1.5), log2(1.5)), col = "gray", linetype = 'dashed') +
geom_hline(yintercept = -log10(0.1), col = "gray", linetype = 'dashed') +
geom_point() +
ggrepel::geom_text_repel(size = 2, color = "black")+
ylim(0, 5) +
scale_color_manual(values = c("Up-regulated"="red", "Down-regulated"="blue", "Not significant" = "black")) +
theme_classic()
# Export as svg
ggsave("results/volcano_plot.svg",
width = 6,
height = 4,
dpi = 300)
# Get just significantly up-regulated genes
up <- sig %>%
filter(log2FoldChange > 0, !is.na(SYMBOL))
write.csv(up,
"results/significantly_upregulated_genes.csv",
row.names = FALSE)
ego <- enrichGO(up$SYMBOL,
OrgDb = org.Hs.eg.db,
ont = "BP",
keyType = "SYMBOL",
pvalueCutoff = 0.05)
All functions:
# All functions
funs <- ego@result %>%
as.data.frame %>%
select(Description, GeneRatio, geneID)
DT::datatable(funs, options = list(ordering = FALSE), filter = "top")
Simplified:
ego.simp <- clusterProfiler::simplify(ego, cutoff = 0.5)
funs.simp <- ego.simp@result %>%
as.data.frame %>%
select(Description, GeneRatio, geneID)
DT::datatable(funs.simp, options = list(ordering = FALSE))
# Export simplified functions table
write.csv(funs.simp,
"results/functions.simp.csv")
This document was last rendered on: 2024-05-17 19:00:02.599245.
sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Sao_Paulo
## tzcode source: internal
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] pheatmap_1.0.12 RColorBrewer_1.1-3
## [3] readxl_1.4.3 lubridate_1.9.3
## [5] forcats_1.0.0 stringr_1.5.1
## [7] dplyr_1.1.4 purrr_1.0.2
## [9] readr_2.1.5 tidyr_1.3.1
## [11] tibble_3.2.1 ggplot2_3.5.0
## [13] tidyverse_2.0.0 clusterProfiler_4.10.1
## [15] DESeq2_1.42.1 SummarizedExperiment_1.32.0
## [17] MatrixGenerics_1.14.0 matrixStats_1.2.0
## [19] org.Hs.eg.db_3.18.0 edgeR_4.0.16
## [21] limma_3.58.1 GenomicFeatures_1.54.4
## [23] AnnotationDbi_1.64.1 Biobase_2.62.0
## [25] GenomicRanges_1.54.1 GenomeInfoDb_1.38.8
## [27] IRanges_2.36.0 S4Vectors_0.40.2
## [29] BiocGenerics_0.48.1
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.3 BiocIO_1.12.0 bitops_1.0-7
## [4] ggplotify_0.1.2 filelock_1.0.3 cellranger_1.1.0
## [7] polyclip_1.10-6 XML_3.99-0.16.1 lifecycle_1.0.4
## [10] lattice_0.22-6 MASS_7.3-60.0.1 crosstalk_1.2.1
## [13] magrittr_2.0.3 sass_0.4.9 rmarkdown_2.26
## [16] jquerylib_0.1.4 yaml_2.3.8 cowplot_1.1.3
## [19] DBI_1.2.2 abind_1.4-5 zlibbioc_1.48.2
## [22] ggraph_2.2.1 RCurl_1.98-1.14 yulab.utils_0.1.4
## [25] tweenr_2.0.3 rappdirs_0.3.3 GenomeInfoDbData_1.2.11
## [28] enrichplot_1.22.0 ggrepel_0.9.5 tidytree_0.4.6
## [31] svglite_2.1.3 codetools_0.2-20 DelayedArray_0.28.0
## [34] DT_0.33 DOSE_3.28.2 xml2_1.3.6
## [37] ggforce_0.4.2 tidyselect_1.2.1 aplot_0.2.2
## [40] farver_2.1.1 viridis_0.6.5 BiocFileCache_2.10.2
## [43] GenomicAlignments_1.38.2 jsonlite_1.8.8 tidygraph_1.3.1
## [46] systemfonts_1.0.6 tools_4.3.3 progress_1.2.3
## [49] ragg_1.3.0 treeio_1.26.0 Rcpp_1.0.12
## [52] glue_1.7.0 gridExtra_2.3 SparseArray_1.2.4
## [55] xfun_0.43 qvalue_2.34.0 withr_3.0.0
## [58] fastmap_1.1.1 fansi_1.0.6 digest_0.6.35
## [61] timechange_0.3.0 R6_2.5.1 gridGraphics_0.5-1
## [64] textshaping_0.3.7 colorspace_2.1-0 GO.db_3.18.0
## [67] biomaRt_2.58.2 RSQLite_2.3.6 utf8_1.2.4
## [70] generics_0.1.3 data.table_1.15.4 rtracklayer_1.62.0
## [73] htmlwidgets_1.6.4 prettyunits_1.2.0 graphlayouts_1.1.1
## [76] httr_1.4.7 S4Arrays_1.2.1 scatterpie_0.2.2
## [79] pkgconfig_2.0.3 gtable_0.3.4 blob_1.2.4
## [82] XVector_0.42.0 shadowtext_0.1.3 htmltools_0.5.8.1
## [85] fgsea_1.28.0 scales_1.3.0 png_0.1-8
## [88] ggfun_0.1.4 knitr_1.45 rstudioapi_0.16.0
## [91] tzdb_0.4.0 reshape2_1.4.4 rjson_0.2.21
## [94] nlme_3.1-164 curl_5.2.1 cachem_1.0.8
## [97] parallel_4.3.3 HDO.db_0.99.1 restfulr_0.0.15
## [100] pillar_1.9.0 vctrs_0.6.5 dbplyr_2.5.0
## [103] evaluate_0.23 cli_3.6.2 locfit_1.5-9.9
## [106] compiler_4.3.3 Rsamtools_2.18.0 rlang_1.1.3
## [109] crayon_1.5.2 labeling_0.4.3 plyr_1.8.9
## [112] fs_1.6.3 stringi_1.8.3 viridisLite_0.4.2
## [115] BiocParallel_1.36.0 munsell_0.5.1 Biostrings_2.70.3
## [118] lazyeval_0.2.2 GOSemSim_2.28.1 Matrix_1.6-5
## [121] hms_1.1.3 patchwork_1.2.0 bit64_4.0.5
## [124] KEGGREST_1.42.0 statmod_1.5.0 highr_0.10
## [127] igraph_2.0.3 memoise_2.0.1 bslib_0.7.0
## [130] ggtree_3.10.1 fastmatch_1.1-4 bit_4.0.5
## [133] ape_5.7-1 gson_0.1.0